# Funk, Paul and Philips
# "Point Break: Using Machine Learning to Uncover a Critical Mass in Women’s Representation", Forthcoming at PSRM
#
#
# Last Updated: 7/7/21
# -SI Figure 9: "VIPfull_edu_wleft.pdf"
# -SI Figure 10: "VIPfull_health_wleft.pdf"
# -SI Figure 11: "VIPfull_defense_wleft.pdf"
# -SI Figure 12: "pctwomen_3expenditures_PDP_wleft.pdf"
# -SI Figure 13a: "pctwomen_democracyinteraction_edu_wleft.pdf"
# -SI Figure 13b: "pctwomen_democracyinteraction_health_wleft.pdf"
# -SI Figure 13c: "pctwomen_democracyinteraction_mil_wleft.pdf"
# -SI Figure 14a: "pctwomen_yearinteraction_edu_wleft.pdf"
# -SI Figure 14b: "pctwomen_yearinteraction_health_wleft.pdf"
# -SI Figure 14c: "pctwomen_yearinteraction_mil_wleft.pdf"
# -SI Figure 15: "pctwomen_3expenditures_implementedquotainteraction_wleft.pdf"
# -----------------------------------------------------------------------------
setwd("~/Dropbox/Kendall-Hannah-Andy/final-replication-june2021")
set.seed(590381)

library(randomForest)
library(gbm)
library(pdp)
library(lattice)
library(caret)
library(ggplot2)
library(haven)
library(ranger)
library(viridis)
library(haven)
library(rdd)
library(ALEPlot)
library(scales)
library(lime)       
library(vip)        

wdi.slim <- read_dta('funk-paul-philips-replication.dta') # load data

# ----------- CREATE SMALLER DATASET OF ONLY NEEDED VARIABLES ------------------ #
myvars <- c("percent_education", "percent_military", "percent_health", # our DVs
            "percent_women_Q", #  
            "polity2", # democracy measure
            # Quota variables to use:
            "implementedquota",   # dummy--country has implemented a gender quota in an election. Coded ‘1’ beginning in the year a quota has been implemented in an election -- whether or not the law was followed -- and in all subsequent years, unless the quota is overturned or withdrawn.         
            "defactothreshold",              
            "quotastrength1",               
            "quotastrength2",                
            "quotashock",
            "year", # stuff for ID'ing
            'Agedependencyratioofworki', # ADR
            'Agricultureforestryandfishi', #  Agriculture, forestry, and fishing, value added (% of GDP)
            'Birthratecrudeper1000peo', #  Birth rate, crude (per 1,000 people)
            'Employmenttopopulationratio', #  Employment to population ratio, 15+, total (%) 
            'Fertilityratetotalbirthspe', # Fertility rate, total (births per woman)
            'Foreigndirectinvestmentneti', #  Foreign direct investment, net inflows (% of GDP)
            'GDPgrowthannualNYGDPMK', #  GDP growth (annual %)
            'GDPpercapitaconstant2010US', #  GDP per capita (constant 2010 US$)
            'Importsofgoodsandservices', # Imports of goods and services (% of GDP)
            'InflationGDPdeflatorannual', #  Inflation, GDP deflator (annual %)
            'Laborforceparticipationrate', # Labor force participation rate, female (% of female population ages 15+) 
            'Laborforcefemaleoftotal', #  Labor force, female (% of total labor force)
            'P', #  Labor force participation rate, male (% of male population ages 15+) 
            'Lifeexpectancyatbirthfemale', #  Life expectancy at birth, female (years)
            'Lifeexpectancyatbirthmale', #  Life expectancy at birth, male (years)
            'Lifetimeriskofmaternaldeath', #  Lifetime risk of maternal death (%)
            'Maternalmortalityratiomodele', #  Maternal mortality ratio (modeled estimate, per 100,000 live births) 
            'PopulationgrowthannualSP', #  Population growth (annual %)
            'Populationdensitypeoplepers', #  Population density (people per sq. km of land area)
            'PopulationtotalSPPOPTOTL', #  Population, total
            'Populationfemaleoftotal', #   Population, female (% of total) 
            'Prevalenceofanemiaamongnonp', #  Prevalence of anemia among non-pregnant women (% of women ages 15-49)
            'Ruralpopulationoftotalpop', #  Rural population (% of total population) 
            'Schoolenrollmentprimarygr', #  School enrollment, preprimary (% gross)
            'TradeofGDPNETRDGNFSZS', #  Trade (% of GDP)
            'Unemploymenttotaloftotal', #  Unemployment, total (% of total labor force)
            'Unemploymentmaleofmalela',  # Unemployment, male (% of male labor force)
            'leftgovt') # Majority party in govt

wdi.slim <- wdi.slim[myvars]
wdi.slim <- na.omit(wdi.slim)
nrow(wdi.slim) # 1236 obs
names(wdi.slim)

# turn dichotomous ones into factors:
wdi.slim$implementedquota <- as.factor(wdi.slim$implementedquota)
n.trees <- 300
node.size <- 1 
mtry <- 14
# ------------------------------------------------------------------------------------------ #


# ------------------------- RUN RANDOM FORESTS ON EACH DV ---------------------------------- #
rf.edu <- randomForest(percent_education  ~ . - percent_health - percent_military, data = wdi.slim, mtry = mtry, node.size = node.size, importance = TRUE, ntree = n.trees)

rf.health <- randomForest(percent_health  ~ . - percent_education - percent_military, data = wdi.slim, mtry = mtry, node.size = node.size, importance = TRUE, ntree = n.trees) 

rf.mil <- randomForest(percent_military  ~ . - percent_education - percent_health, data = wdi.slim, mtry = mtry, node.size = node.size, importance = TRUE, ntree = n.trees)
# ------------------------------------------------------------------------------------------ #


# ------------------------- CREATE VARIABLE IMPORTANT PLOTS ---------------------------------- #
# Make VIP's by hand. For defense
vip.dat <- as.data.frame(cbind(names(rf.mil$importance[,1]), rf.mil$importance[,1]/rf.mil$importanceSD))
vip.dat$V2 <- as.numeric(as.character(vip.dat$V2))
vip.dat <- vip.dat[order(-vip.dat$V2),] # sort from largest to smallest
vip.dat.shorty <- vip.dat[1:12,] # and keep only top 12 in case we want a more simplified VIF plot

# Need to label everything:
vip.dat$varnames[vip.dat$V1 == "Populationfemaleoftotal"] <- "% Female Population"
vip.dat$varnames[vip.dat$V1 == "PopulationtotalSPPOPTOTL"] <- "Total Population"
vip.dat$varnames[vip.dat$V1 == "Populationdensitypeoplepers"] <- "Population Density"
vip.dat$varnames[vip.dat$V1 == "Ruralpopulationoftotalpop"] <- "% Rural Population"
vip.dat$varnames[vip.dat$V1 == "Importsofgoodsandservices"] <- "Imports"
vip.dat$varnames[vip.dat$V1 == "TradeofGDPNETRDGNFSZS"] <- "Trade"
vip.dat$varnames[vip.dat$V1 == "Lifeexpectancyatbirthfemale"] <- "Female Life Expectancy"
vip.dat$varnames[vip.dat$V1 == "Prevalenceofanemiaamongnonp"] <- "Anemia Prevalence (% of Women)"
vip.dat$varnames[vip.dat$V1 == "percent_women_Q"] <- "% Women in Legislature"
vip.dat$varnames[vip.dat$V1 == "Agedependencyratioofworki"] <- "Age Dependency Ratio"
vip.dat$varnames[vip.dat$V1 == "P"] <- "Male Labor Force Participation Rate"
vip.dat$varnames[vip.dat$V1 == "Laborforcefemaleoftotal"] <- "% Labor Force Female"
vip.dat$varnames[vip.dat$V1 == "year"] <- "Year"
vip.dat$varnames[vip.dat$V1 == "Schoolenrollmentprimarygr"] <- "% School Enrollment"
vip.dat$varnames[vip.dat$V1 == "Unemploymenttotaloftotal"] <- "Unemployment Rate"
vip.dat$varnames[vip.dat$V1 == "Lifeexpectancyatbirthmale"] <- "Male Life Expectancy"
vip.dat$varnames[vip.dat$V1 == "Unemploymentmaleofmalela"] <-  "Male Unemployment (%)"
vip.dat$varnames[vip.dat$V1 == "Laborforceparticipationrate"] <- "Female Labor Force Participation Rate"
vip.dat$varnames[vip.dat$V1 == "GDPpercapitaconstant2010US"] <- "GDP Per Capita"
vip.dat$varnames[vip.dat$V1 == "Agricultureforestryandfishi"] <- "% of GDP from Agriculture"
vip.dat$varnames[vip.dat$V1 == "PopulationgrowthannualSP"] <- "Population Growth"
vip.dat$varnames[vip.dat$V1 == "GDPgrowthannualNYGDPMK"] <- "GDP Growth"
vip.dat$varnames[vip.dat$V1 == "polity2"] <- "Polity"
vip.dat$varnames[vip.dat$V1 == "Employmenttopopulationratio"] <- "Employment to Population Ratio"
vip.dat$varnames[vip.dat$V1 == "Fertilityratetotalbirthspe"] <- "Fertility Rate"
vip.dat$varnames[vip.dat$V1 == "Foreigndirectinvestmentneti"] <- "FDI"
vip.dat$varnames[vip.dat$V1 == "Birthratecrudeper1000peo"] <- "Birth Rate"
vip.dat$varnames[vip.dat$V1 == "Maternalmortalityratiomodele"] <- "Maternal Mortality Ratio"
vip.dat$varnames[vip.dat$V1 == "InflationGDPdeflatorannual"] <- "Inflation"
vip.dat$varnames[vip.dat$V1 == "Lifetimeriskofmaternaldeath"] <- "Maternal Mortality Risk (Lifetime)"
vip.dat$varnames[vip.dat$V1 == "quotastrength1"] <- "Quota Strength (1)"
vip.dat$varnames[vip.dat$V1 == "quotastrength2"] <- "Quota Strength (2)"
vip.dat$varnames[vip.dat$V1 == "implementedquota"] <- "Implemented Quota"
vip.dat$varnames[vip.dat$V1 == "quotashock"] <- "Quota Shock"
vip.dat$varnames[vip.dat$V1 == "defactothreshold"] <- "De Facto Threshold" 
vip.dat$varnames[vip.dat$V1 == "leftgovt"] <- "Largest Government Party" 

# Plot VIFs, sorting by importance and highlighting % women
graph.full <- ggplot(vip.dat, aes(x = reorder(varnames, -V2), y = V2, fill=factor(ifelse(V1=='percent_women_Q', 'Normal', 'Highlighted')))) + geom_bar( stat = 'identity') + theme_minimal() + theme(axis.text.x=element_text(angle=75, hjust=1)) + xlab('') + ylab('% Increase in MSE') + ggtitle('Defense') + theme(legend.position = "none")
pdf(file= 'VIPfull_defense_wleft.pdf', width=8, height=5.5)
graph.full
dev.off()

# For education
vip.dat <- as.data.frame(cbind(names(rf.edu$importance[,1]), rf.edu$importance[,1]/rf.edu$importanceSD))
vip.dat$V2 <- as.numeric(as.character(vip.dat$V2))
vip.dat <- vip.dat[order(-vip.dat$V2),] # sort from largest to smallest
# master name maker for the full dataset:
vip.dat$varnames[vip.dat$V1 == "Populationfemaleoftotal"] <- "% Female Population"
vip.dat$varnames[vip.dat$V1 == "PopulationtotalSPPOPTOTL"] <- "Total Population"
vip.dat$varnames[vip.dat$V1 == "Populationdensitypeoplepers"] <- "Population Density"
vip.dat$varnames[vip.dat$V1 == "Ruralpopulationoftotalpop"] <- "% Rural Population"
vip.dat$varnames[vip.dat$V1 == "Importsofgoodsandservices"] <- "Imports"
vip.dat$varnames[vip.dat$V1 == "TradeofGDPNETRDGNFSZS"] <- "Trade"
vip.dat$varnames[vip.dat$V1 == "Lifeexpectancyatbirthfemale"] <- "Female Life Expectancy"
vip.dat$varnames[vip.dat$V1 == "Prevalenceofanemiaamongnonp"] <- "Anemia Prevalence (% of Women)"
vip.dat$varnames[vip.dat$V1 == "percent_women_Q"] <- "% Women in Legislature"
vip.dat$varnames[vip.dat$V1 == "Agedependencyratioofworki"] <- "Age Dependency Ratio"
vip.dat$varnames[vip.dat$V1 == "P"] <- "Male Labor Force Participation Rate"
vip.dat$varnames[vip.dat$V1 == "Laborforcefemaleoftotal"] <- "% Labor Force Female"
vip.dat$varnames[vip.dat$V1 == "year"] <- "Year"
vip.dat$varnames[vip.dat$V1 == "Schoolenrollmentprimarygr"] <- "% School Enrollment"
vip.dat$varnames[vip.dat$V1 == "Unemploymenttotaloftotal"] <- "Unemployment Rate"
vip.dat$varnames[vip.dat$V1 == "Lifeexpectancyatbirthmale"] <- "Male Life Expectancy"
vip.dat$varnames[vip.dat$V1 == "Unemploymentmaleofmalela"] <-  "Male Unemployment (%)"
vip.dat$varnames[vip.dat$V1 == "Laborforceparticipationrate"] <- "Female Labor Force Participation Rate"
vip.dat$varnames[vip.dat$V1 == "GDPpercapitaconstant2010US"] <- "GDP Per Capita"
vip.dat$varnames[vip.dat$V1 == "Agricultureforestryandfishi"] <- "% of GDP from Agriculture"
vip.dat$varnames[vip.dat$V1 == "PopulationgrowthannualSP"] <- "Population Growth"
vip.dat$varnames[vip.dat$V1 == "GDPgrowthannualNYGDPMK"] <- "GDP Growth"
vip.dat$varnames[vip.dat$V1 == "polity2"] <- "Polity"
vip.dat$varnames[vip.dat$V1 == "Employmenttopopulationratio"] <- "Employment to Population Ratio"
vip.dat$varnames[vip.dat$V1 == "Fertilityratetotalbirthspe"] <- "Fertility Rate"
vip.dat$varnames[vip.dat$V1 == "Foreigndirectinvestmentneti"] <- "FDI"
vip.dat$varnames[vip.dat$V1 == "Birthratecrudeper1000peo"] <- "Birth Rate"
vip.dat$varnames[vip.dat$V1 == "Maternalmortalityratiomodele"] <- "Maternal Mortality Ratio"
vip.dat$varnames[vip.dat$V1 == "InflationGDPdeflatorannual"] <- "Inflation"
vip.dat$varnames[vip.dat$V1 == "Lifetimeriskofmaternaldeath"] <- "Maternal Mortality Risk (Lifetime)"
vip.dat$varnames[vip.dat$V1 == "quotastrength1"] <- "Quota Strength (1)"
vip.dat$varnames[vip.dat$V1 == "quotastrength2"] <- "Quota Strength (2)"
vip.dat$varnames[vip.dat$V1 == "implementedquota"] <- "Implemented Quota"
vip.dat$varnames[vip.dat$V1 == "quotashock"] <- "Quota Shock"
vip.dat$varnames[vip.dat$V1 == "defactothreshold"] <- "De Facto Threshold" 
vip.dat$varnames[vip.dat$V1 == "leftgovt"] <- "Largest Government Party" 

graph.full <- ggplot(vip.dat, aes(x = reorder(varnames, -V2), y = V2, fill=factor(ifelse(V1=='percent_women_Q', 'Normal', 'Highlighted')))) + geom_bar(stat = 'identity') + theme_minimal() + theme(axis.text.x=element_text(angle=75, hjust=1)) + xlab('') + ylab('% Increase in MSE') + ggtitle('Education') + theme(legend.position = "none")

pdf(file= 'VIPfull_edu_wleft.pdf', width=8, height=5.5)
graph.full
dev.off()

# For health
vip.dat <- as.data.frame(cbind(names(rf.health$importance[,1]), rf.health$importance[,1]/rf.health$importanceSD))
vip.dat$V2 <- as.numeric(as.character(vip.dat$V2))
vip.dat <- vip.dat[order(-vip.dat$V2),] # sort from largest to smallest

# master name maker for the full dataset:
vip.dat$varnames[vip.dat$V1 == "Populationfemaleoftotal"] <- "% Female Population"
vip.dat$varnames[vip.dat$V1 == "PopulationtotalSPPOPTOTL"] <- "Total Population"
vip.dat$varnames[vip.dat$V1 == "Populationdensitypeoplepers"] <- "Population Density"
vip.dat$varnames[vip.dat$V1 == "Ruralpopulationoftotalpop"] <- "% Rural Population"
vip.dat$varnames[vip.dat$V1 == "Importsofgoodsandservices"] <- "Imports"
vip.dat$varnames[vip.dat$V1 == "TradeofGDPNETRDGNFSZS"] <- "Trade"
vip.dat$varnames[vip.dat$V1 == "Lifeexpectancyatbirthfemale"] <- "Female Life Expectancy"
vip.dat$varnames[vip.dat$V1 == "Prevalenceofanemiaamongnonp"] <- "Anemia Prevalence (% of Women)"
vip.dat$varnames[vip.dat$V1 == "percent_women_Q"] <- "% Women in Legislature"
vip.dat$varnames[vip.dat$V1 == "Agedependencyratioofworki"] <- "Age Dependency Ratio"
vip.dat$varnames[vip.dat$V1 == "P"] <- "Male Labor Force Participation Rate"
vip.dat$varnames[vip.dat$V1 == "Laborforcefemaleoftotal"] <- "% Labor Force Female"
vip.dat$varnames[vip.dat$V1 == "year"] <- "Year"
vip.dat$varnames[vip.dat$V1 == "Schoolenrollmentprimarygr"] <- "% School Enrollment"
vip.dat$varnames[vip.dat$V1 == "Unemploymenttotaloftotal"] <- "Unemployment Rate"
vip.dat$varnames[vip.dat$V1 == "Lifeexpectancyatbirthmale"] <- "Male Life Expectancy"
vip.dat$varnames[vip.dat$V1 == "Unemploymentmaleofmalela"] <-  "Male Unemployment (%)"
vip.dat$varnames[vip.dat$V1 == "Laborforceparticipationrate"] <- "Female Labor Force Participation Rate"
vip.dat$varnames[vip.dat$V1 == "GDPpercapitaconstant2010US"] <- "GDP Per Capita"
vip.dat$varnames[vip.dat$V1 == "Agricultureforestryandfishi"] <- "% of GDP from Agriculture"
vip.dat$varnames[vip.dat$V1 == "PopulationgrowthannualSP"] <- "Population Growth"
vip.dat$varnames[vip.dat$V1 == "GDPgrowthannualNYGDPMK"] <- "GDP Growth"
vip.dat$varnames[vip.dat$V1 == "polity2"] <- "Polity"
vip.dat$varnames[vip.dat$V1 == "Employmenttopopulationratio"] <- "Employment to Population Ratio"
vip.dat$varnames[vip.dat$V1 == "Fertilityratetotalbirthspe"] <- "Fertility Rate"
vip.dat$varnames[vip.dat$V1 == "Foreigndirectinvestmentneti"] <- "FDI"
vip.dat$varnames[vip.dat$V1 == "Birthratecrudeper1000peo"] <- "Birth Rate"
vip.dat$varnames[vip.dat$V1 == "Maternalmortalityratiomodele"] <- "Maternal Mortality Ratio"
vip.dat$varnames[vip.dat$V1 == "InflationGDPdeflatorannual"] <- "Inflation"
vip.dat$varnames[vip.dat$V1 == "Lifetimeriskofmaternaldeath"] <- "Maternal Mortality Risk (Lifetime)"
vip.dat$varnames[vip.dat$V1 == "quotastrength1"] <- "Quota Strength (1)"
vip.dat$varnames[vip.dat$V1 == "quotastrength2"] <- "Quota Strength (2)"
vip.dat$varnames[vip.dat$V1 == "implementedquota"] <- "Implemented Quota"
vip.dat$varnames[vip.dat$V1 == "quotashock"] <- "Quota Shock"
vip.dat$varnames[vip.dat$V1 == "defactothreshold"] <- "De Facto Threshold" 
vip.dat$varnames[vip.dat$V1 == "leftgovt"] <- "Largest Government Party" 

graph.full <- ggplot(vip.dat, aes(x = reorder(varnames, -V2), y = V2, fill=factor(ifelse(V1=='percent_women_Q', 'Normal', 'Highlighted')))) + geom_bar(stat = 'identity') + theme_minimal() + theme(axis.text.x=element_text(angle=75, hjust=1)) + xlab('') + ylab('% Increase in MSE') + ggtitle('Health') + theme(legend.position = "none")
pdf(file= 'VIPfull_health_wleft.pdf', width=8, height=5.5)
graph.full
dev.off()
# ------------------------------------------------------------------------------------------ #

# ------------------------- CREATE PARTIAL DEPENDENCE PLOTS ---------------------------------- #
# initialize prediction function for each DV
pred <- function(object, newdata) predict(object, newdata)
pdp.edu <- partial(rf.edu, pred.var = 'percent_women_Q', pred.fun = pred)
pdp.health <- partial(rf.health, pred.var = 'percent_women_Q', pred.fun = pred)
pdp.mil <- partial(rf.mil, pred.var = 'percent_women_Q', pred.fun = pred)

# PDPs of each DV, combined together in a single plot:
p1 <- ggplot() + stat_summary(data = pdp.edu, aes(percent_women_Q, yhat), fun.y = mean, geom = 'line', col = 'black', size = 1) + theme_minimal() + xlab('% Women in Legislature') + ylab('Predicted Value') + ggtitle('Education') + geom_rug(data =wdi.slim, aes(x = percent_women_Q))
p2 <- ggplot() + stat_summary(data = pdp.health, aes(percent_women_Q, yhat), fun.y = mean, geom = 'line', col = 'black', size = 1) + theme_minimal() + xlab('% Women in Legislature') + ylab('Predicted Value') + ggtitle('Health') + geom_rug(data =wdi.slim, aes(x = percent_women_Q))
p3 <- ggplot() + stat_summary(data = pdp.mil, aes(percent_women_Q, yhat), fun.y = mean, geom = 'line', col = 'black', size = 1) + theme_minimal() + xlab('% Women in Legislature') + ylab('Predicted Value') + ggtitle('Defense') + geom_rug(data =wdi.slim, aes(x = percent_women_Q))

pdf(file= 'pctwomen_3expenditures_PDP_wleft.pdf', width=8, height=3.5)
grid.arrange(p1, p2, p3, ncol = 3)
dev.off()
# ------------------------------------------------------------------------------------------ #

# ------------------------- CREATE PDPs w/ INTERACTIONS ---------------------------------------- #
# Interaction w/ Democracy: --------
pdp.edu.inter <- partial(rf.edu, pred.var = c('percent_women_Q', 'polity2'), pred.fun = pred, chull = TRUE)
agg.pdp.edu.inter <- aggregate(pdp.edu.inter, by = list(pdp.edu.inter$percent_women_Q, pdp.edu.inter$polity2), FUN=mean)
g1 <- ggplot(agg.pdp.edu.inter, aes(x = percent_women_Q, y = polity2)) +
  geom_tile(aes(x = percent_women_Q, y = polity2, fill = yhat)) +
  scale_fill_gradientn("Expenditures\n(% of GDP)", colours=c("#0000FFFF","#FF0000FF")) + theme_minimal() + xlab('% Women in Legislature') + ylab('Polity Score') + ggtitle('Education') + theme(legend.title = element_text(size=5), legend.text = element_text(size = 5), legend.position = 'bottom') + guides(color = guide_legend(override.aes = list(size = 0.3)))

pdp.health.inter <- partial(rf.health, pred.var = c('percent_women_Q', 'polity2'), pred.fun = pred, chull = TRUE)
agg.pdp.health.inter <- aggregate(pdp.health.inter, by = list(pdp.health.inter$percent_women_Q, pdp.health.inter$polity2), FUN=mean)
g2 <- ggplot(agg.pdp.health.inter, aes(x = percent_women_Q, y = polity2)) + geom_tile(aes(x = percent_women_Q, y = polity2, fill = yhat)) + scale_fill_gradientn("Expenditures\n(% of GDP)", colours=c("#0000FFFF","#FF0000FF")) + theme_minimal() + xlab('% Women in Legislature') + ylab('Polity Score') + ggtitle('Health') + theme(legend.title = element_text(size=5), legend.text = element_text(size = 5), legend.position = 'bottom') + guides(color = guide_legend(override.aes = list(size = 0.3))) 

pdp.mil.inter <- partial(rf.mil, pred.var = c('percent_women_Q', 'polity2'), pred.fun = pred, chull = TRUE)
agg.pdp.mil.inter <- aggregate(pdp.mil.inter, by = list(pdp.mil.inter$percent_women_Q, pdp.mil.inter$polity2), FUN=mean)
g3 <- ggplot(agg.pdp.mil.inter, aes(x = percent_women_Q, y = polity2)) + geom_tile(aes(x = percent_women_Q, y = polity2, fill = yhat)) + theme_minimal() + xlab('% Women in Legislature') + ylab('Polity Score') + ggtitle('Defense') + scale_fill_gradientn("Expenditures\n(% of GDP)", colours=c("#0000FFFF","#FF0000FF")) + theme(legend.title = element_text(size=5), legend.text = element_text(size = 5), legend.position = 'bottom') + guides(barwidth = .32)

# going to plot things separately since it's easier w/ legends and seeing things
pdf(file= 'pctwomen_democracyinteraction_edu_wleft.pdf', width = 4, height = 4)
g1
dev.off()
pdf(file= 'pctwomen_democracyinteraction_health_wleft.pdf', width = 4, height = 4)
g2
dev.off()
pdf(file= 'pctwomen_democracyinteraction_mil_wleft.pdf', width = 4, height = 4)
g3
dev.off()

# Interaction with year ----------------------------
pdp.edu.inter <- partial(rf.edu, pred.var = c('percent_women_Q', 'year'), pred.fun = pred, chull = TRUE)
agg.pdp.edu.inter <- aggregate(pdp.edu.inter, by = list(pdp.edu.inter$percent_women_Q, pdp.edu.inter$year), FUN=mean)
g1 <- ggplot(agg.pdp.edu.inter, aes(x = percent_women_Q, y = year)) + geom_tile(aes(x = percent_women_Q, y = year, fill = yhat)) + scale_fill_gradientn("Expenditures\n(% of GDP)", colours=c("#0000FFFF","#FF0000FF")) +   theme_minimal() + xlab('% Women in Legislature') + ylab('Year') + ggtitle('Education') + theme(legend.title = element_text(size=5), legend.text = element_text(size = 5), legend.position = 'bottom') + guides(color = guide_legend(override.aes = list(size = 0.3)))
plot(g1)

pdp.health.inter <- partial(rf.health, pred.var = c('percent_women_Q', 'year'), pred.fun = pred, chull = TRUE)
agg.pdp.health.inter <- aggregate(pdp.health.inter, by = list(pdp.health.inter$percent_women_Q, pdp.health.inter$year), FUN=mean)
g2 <- ggplot(agg.pdp.health.inter, aes(x = percent_women_Q, y = year)) + geom_tile(aes(x = percent_women_Q, y = year, fill = yhat)) + scale_fill_gradientn("Expenditures\n(% of GDP)", colours=c("#0000FFFF","#FF0000FF")) + theme_minimal() + xlab('% Women in Legislature') + ylab('Year') + ggtitle('Health') + theme(legend.title = element_text(size=5), legend.text = element_text(size = 5), legend.position = 'bottom') + guides(color = guide_legend(override.aes = list(size = 0.3)))  

pdp.mil.inter <- partial(rf.mil, pred.var = c('percent_women_Q', 'year'), pred.fun = pred, chull = TRUE)
agg.pdp.mil.inter <- aggregate(pdp.mil.inter, by = list(pdp.mil.inter$percent_women_Q, pdp.mil.inter$year), FUN=mean)
g3 <- ggplot(agg.pdp.mil.inter, aes(x = percent_women_Q, y = year)) + geom_tile(aes(x = percent_women_Q, y = year, fill = yhat)) + theme_minimal() + xlab('% Women in Legislature') + ylab('Year') + ggtitle('Defense') + scale_fill_gradientn("Expenditures\n(% of GDP)", colours=c("#0000FFFF","#FF0000FF")) + theme(legend.title = element_text(size=5), legend.text = element_text(size = 5), legend.position = 'bottom') + guides(barwidth = .32)

# going to plot things separately since it's easier w/ legends and seeing things
pdf(file= 'pctwomen_yearinteraction_edu_wleft.pdf', width = 4, height = 4)
g1
dev.off()
pdf(file= 'pctwomen_yearinteraction_health_wleft.pdf', width = 4, height = 4)
g2
dev.off()
pdf(file= 'pctwomen_yearinteraction_mil_wleft.pdf', width = 4, height = 4)
g3
dev.off()

# Across implementedquota (dichotomous) ------------------
# Note: Dashed = implemented quota, solid = no quota
pdp.edu.inter <- partial(rf.edu, pred.var = c('percent_women_Q', 'implementedquota'), pred.fun = pred, chull = TRUE)
pdp.edu.inter$implementedquota <- as.numeric(as.character(pdp.edu.inter$implementedquota))
agg.pdp.edu.inter <- aggregate(pdp.edu.inter, by = list(pdp.edu.inter$percent_women_Q, pdp.edu.inter$implementedquota), FUN=mean)
p1 <- ggplot(agg.pdp.edu.inter, aes(x = percent_women_Q, y = yhat, group = factor(implementedquota))) + geom_line(aes(linetype = factor(implementedquota))) + xlab('% Women in Legislature') + ylab('Predicted Value') + ggtitle('Education') + theme_minimal() + theme(legend.position = "none") 

pdp.health.inter <- partial(rf.health, pred.var = c('percent_women_Q', 'implementedquota'), pred.fun = pred, chull = TRUE)
pdp.health.inter$implementedquota <- as.numeric(as.character(pdp.health.inter$implementedquota))
agg.pdp.health.inter <- aggregate(pdp.health.inter, by = list(pdp.health.inter$percent_women_Q, pdp.health.inter$implementedquota), FUN=mean)
p2 <- ggplot(agg.pdp.health.inter, aes(x = percent_women_Q, y = yhat, group = factor(implementedquota))) + geom_line(aes(linetype = factor(implementedquota))) + xlab('% Women in Legislature') + ylab('Predicted Value') + ggtitle('Health') + theme_minimal() + theme(legend.position = "none")

pdp.mil.inter <- partial(rf.mil, pred.var = c('percent_women_Q', 'implementedquota'), pred.fun = pred, chull = TRUE)
pdp.mil.inter$implementedquota <- as.numeric(as.character(pdp.mil.inter$implementedquota))
agg.pdp.mil.inter <- aggregate(pdp.mil.inter, by = list(pdp.mil.inter$percent_women_Q, pdp.mil.inter$implementedquota), FUN=mean)
p3 <- ggplot(agg.pdp.mil.inter, aes(x = percent_women_Q, y = yhat, group = factor(implementedquota))) + geom_line(aes(linetype = factor(implementedquota))) + xlab('% Women in Legislature') + ylab('Predicted Value') + ggtitle('Defense') + theme_minimal() + theme(legend.position = "none")

pdf(file= 'pctwomen_3expenditures_implementedquotainteraction_wleft.pdf', width=8, height=3.5)
grid.arrange(p1, p2, p3, ncol = 3)
dev.off()

# ------------------------------------------------------------------------------------------ #

